# coding=utf-8
# html_parser.py
"""
Parse html code
:author: antony zhao
:date: 24/3/2020
"""

import logging
from bs4 import BeautifulSoup

# the class for parse html
from spider.html_downloader import HtmlDownloader


class HtmlParser(object):
    def __init__(self):
        self.list_article = []

    def parse(self, url):
        """
        function for parse data that we want
        :return: the urls of article and the data that we want
        """
        logging.debug("ready to parse html")
        html_cont = HtmlDownloader.download(url)
        soup = BeautifulSoup(html_cont, 'lxml', from_encoding='utf-8')
        logging.debug("parse html finished")
        return soup